Setup
devtools::load_all(here::here())
library(here)
library(haven)
library(readxl)
library(dplyr)
————-
Pilot
This is the data of 36 participants rating the vignettes on four dimensions.
Import
d_pilot_0 <- haven::read_sav(
here::here("data-raw", "00_raw", "0_pilot.sav")
)
Exclusion
####### Informed Consent #######
# First excluding "Informed Consent is NA"
# Then based on "Informed Consent is 0"
d_pilot_1 <- d_pilot_0 %>%
dplyr::filter(!is.na(Consent)) %>%
dplyr::filter(Consent == 1)
## Number of participants excluded based on Informed Consent
nrow(d_pilot_0) - nrow(d_pilot_1)
## [1] 1
####### Progress #######
d_pilot_1_2 <- d_pilot_1 %>%
dplyr::filter(Progress > 95)
## Number of participants excluded based on Progress
nrow(d_pilot_1) - nrow(d_pilot_1_2)
## [1] 32
####### Duration #######
d_pilot_2 <- d_pilot_1_2 %>% RadLab::duration_exclusion("Duration__in_seconds_")
## Median duration: 845 seconds
## Upper bound: 1690 seconds
## Lower bound: 422.5 seconds
## Number of participants excluded: 8
## Warning: package 'ggplot2' was built under R version 4.3.2
Cleaning
####### Columns #######
# Only Selecting the Relevant Columns
d_pilot_3 <- d_pilot_2[,19:102]
# Converting the Columns to Numeric
d_pilot <- data.frame(
lapply(d_pilot_3, as.numeric))
Means
In here, we have to calculate the mean for each vignette.
# Calculating means and storing them:
v.means_raw_pilot <- colMeans(d_pilot, na.rm = TRUE)
# Creating a data frame of means:
v.means_pilot <- as.data.frame(
matrix(0,
ncol = 5,
nrow = 21))
colnames(v.means_pilot) <- c(
'excluded',
'injustice',
'personal',
'violence',
'vignette_id')
# Giving the order
v.means_pilot['vignette_id'] <- 1:21
# Loop to assign means:
for (i in 1:4) {
v.means_pilot[i] <- v.means_raw_pilot[seq(i,84,4)]
}
Matching with Overview Excel
This is the hand-filled excel file. It includes the order of the vignettes in each study. If a new study is done, it needs to be adjusted accordingly.
# Importing the Excel File
studies_all <- readxl::read_excel(
here::here("data-raw", "experiment_metadata.xlsx")
)
studies_all <- studies_all[1:15,] # Only selecting the actually used vignettes
# Min-max normalisation for the control variables:
v.means_pilot <- v.means_pilot %>%
filter(vignette_id %in% studies_all$Pilot)
v.means_pilot_normalised <- v.means_pilot %>%
mutate(across(c(excluded, injustice, personal, violence),
~ (. - min(.)) / (max(.) - min(.))))
v.means_pilot <- v.means_pilot_normalised
########## Vignette Names and Their ID ##########
vignettes_full <- base::merge(
v.means_pilot,
studies_all,
by.x = "vignette_id",
by.y = "Pilot")
########## Order: Studies ##########
o_study1 <- vignettes_full[!is.na(vignettes_full[7]), c(2:6, 7)]
o_study1 <- o_study1[order(o_study1[[6]]), ]
colnames(o_study1)[6] <- "order"
o_study2 <- vignettes_full[!is.na(vignettes_full[8]), c(2:6, 8)]
o_study2 <- o_study2[order(o_study2[[6]]), ]
colnames(o_study2)[6] <- "order"
o_study3 <- vignettes_full[!is.na(vignettes_full[9]), c(2:6, 9)]
o_study3 <- o_study3[order(o_study3[[6]]), ]
colnames(o_study3)[6] <- "order"
o_study4 <- vignettes_full[!is.na(vignettes_full[10]), c(2:6, 10)]
o_study4 <- o_study4[order(o_study4[[6]]), ]
colnames(o_study4)[6] <- "order"
o_study5 <- vignettes_full[!is.na(vignettes_full[11]), c(2:6, 11)]
o_study5 <- o_study5[order(o_study5[[6]]), ]
colnames(o_study5)[6] <- "order"
o_study6 <- vignettes_full[!is.na(vignettes_full[12]), c(2:6, 12)]
o_study6 <- o_study6[order(o_study6[[6]]), ]
colnames(o_study6)[6] <- "order"
o_study7 <- vignettes_full[!is.na(vignettes_full[13]), c(2:6, 13)]
o_study7 <- o_study7[order(o_study7[[6]]), ]
colnames(o_study7)[6] <- "order"
o_study8 <- vignettes_full[!is.na(vignettes_full[14]), c(2:6, 14)]
o_study8 <- o_study8[order(o_study8[[6]]), ]
colnames(o_study8)[6] <- "order"
————-
Study 1 (2019-2020)
Import
# Importing data:
d_study1_0 <- haven::read_spss(
here::here("data-raw", "00_raw", "1_hartog.sav")
)
Exclusion
####### Informed Consent #######
# First excluding "Informed Consent is NA"
# Then based on "Informed Consent is 0"
d_study1_1 <- d_study1_0 %>%
dplyr::filter(!is.na(Consent)) %>%
dplyr::filter(Consent == 1)
## Number of participants excluded based on Informed Consent
nrow(d_study1_0) - nrow(d_study1_1)
## [1] 0
####### Progress #######
d_study1_1_2 <- d_study1_1 %>%
dplyr::filter(Progress > 95)
## Number of participants excluded based on Progress
nrow(d_study1_1) - nrow(d_study1_1_2)
## [1] 23
####### Duration #######
d_study1_2 <- d_study1_1_2 %>% RadLab::duration_exclusion("Duration__in_seconds_")
## Median duration: 1307.5 seconds
## Upper bound: 2615 seconds
## Lower bound: 653.75 seconds
## Number of participants excluded: 35
Cleaning
####### Columns #######
# Only Selecting the Relevant Columns
d_study1_3 <- d_study1_2 %>%
subset(select = c(70:129, # Vignettes
139, # Condition
130, # Gender
131, # Age
34, # pol_liberal_conserv
35 # pol_left_right
))
# Assigning ID to the participants:
d_study1_3$ID <-
# Number of Participants in Previous Studies:
0 + ## --> i.e., this is the first study
# Number of participants in this study:
1:nrow(d_study1_3)
# Assigning experiment number to all participants in this dataset
d_study1_3$experiment <- 1
# Converting the Columns to Numeric
d_study1 <- RadLab::enforce_variable_types(d_study1_3)
Means plot
d_study1[,1:60] %>% RadLab::plot_vignette_means(
name_data = o_study1,
n_vignettes = 15)
## [1] "all rows sum up to a 100, so everything is good!"
Data preparation
Structuring the data
d_study1_long_0 <- d_study1 %>% RadLab::wide_to_long(
name_data = o_study1,
n_vignettes = 15
)
Means Plot (2)
RadLab::plot_vignette_analysis(
d_study1_long_0,
error_type = "sd"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## $ribbon
##
## $faceted
##
## $bars
Factorizing
##### Factorizing #####
# Condition
d_study1_long_0 <- d_study1_long_0 %>%
dplyr::mutate(
condition_f = factor(
haven::labelled_spss(EmpathyCondition),
levels = 1:2,
labels = paste("Condition", 1:2)))
# Gender
d_study1_long_0 <- d_study1_long_0 %>%
dplyr::mutate(
gender_f = factor(
haven::labelled_spss(gender),
levels = 1:2,
labels = c("Female", "Male")))
# Taking out the already converted variables
d_study1_long <- d_study1_long_0 %>% subset(select = -c(EmpathyCondition, gender))
Save
d_study1 <- d_study1_long %>% subset(select = -c(
pol_left_right,
pol_liberal_conserv))
save(d_study1, file = here::here("data-raw", "01_cleaned", "d_study1.rda"))
————-
Study 2 (2020-2021)
Import
d_study2_0 <- haven::read_spss(
here::here("data-raw", "00_raw", "2_meike.sav")
)
Exclusion
####### Informed Consent #######
# First excluding "Informed Consent is NA"
# Then based on "Informed Consent is 0"
d_study2_1 <- d_study2_0 %>%
dplyr::filter(!is.na(Consent)) %>%
dplyr::filter(Consent == 1)
## Number of participants excluded based on Informed Consent
nrow(d_study2_0) - nrow(d_study2_1)
## [1] 0
####### Progress #######
d_study2_1_2 <- d_study2_1 %>%
dplyr::filter(Progress > 95)
## Number of participants excluded based on Progress
nrow(d_study2_1) - nrow(d_study2_1_2)
## [1] 4
####### Duration #######
d_study2_2 <- d_study2_1_2 %>% RadLab::duration_exclusion("Duration_seconds")
## Median duration: 1626.5 seconds
## Upper bound: 3253 seconds
## Lower bound: 813.25 seconds
## Number of participants excluded: 20
Cleaning
Specifically for this data, in between measures for the vignettes, Meike has a “exclusion” and “fair” measure. They make it difficult to take the relevant columns. Hence, I am first going to delete those columns from the dataset.
####### Columns #######
# Only Selecting the Relevant Columns
d_study2_3 <- d_study2_2 %>%
subset(select = -grep("exclusion_fair|eclusion_fair", names(.))) %>%
subset(select = -c(40:45)) %>%
subset(select = c(20:79,
3, # Condition
80, # Gender (gender)
81, # Age (age)
103, # liberal_conservative_rec
104 # left_right_rec
))
# Assigning ID to the participants:
d_study2_3$ID <-
# Number of Participants in Previous Studies:
length(unique(d_study1$ID)) +
# Number of participants in this study:
1:nrow(d_study2_3)
# Assigning experiment number to all participants in this dataset
d_study2_3$experiment <- 2
# Converting the Columns to Numeric
d_study2 <- RadLab::enforce_variable_types(d_study2_3)
Means plot
d_study2[, 1:60] %>% RadLab::plot_vignette_means(
name_data = o_study2,
n_vignettes = 15)
## [1] "all rows sum up to a 100, so everything is good!"
Data preparation
Structuring the data
d_study2_long_0 <- d_study2 %>% RadLab::wide_to_long(
name_data = o_study2,
n_vignettes = 15)
Means Plot (2)
RadLab::plot_vignette_analysis(
d_study2_long_0,
error_type = "sd"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## $ribbon
##
## $faceted
##
## $bars
Factorizing
##### Factorizing #####
# Condition
d_study2_long_0 <- d_study2_long_0 %>%
dplyr::mutate(
condition_f = factor(
haven::labelled_spss(Condition),
levels = 1:3,
labels = paste("Condition", 1:3)))
# Gender
d_study2_long_0 <- d_study2_long_0 %>%
dplyr::mutate(
gender_f = factor(
haven::labelled_spss(gender),
levels = 1:2,
labels = c("Female", "Male")))
# Taking out the already converted variables
d_study2_long <- d_study2_long_0 %>% subset(select = -c(Condition, gender))
Save
d_study2 <- d_study2_long %>% subset(select = -c(
liberal_conservative_rec,
left_right_rec))
save(d_study2, file = here::here("data-raw", "01_cleaned", "d_study2.rda"))
————-
Study 3 (2020-2021)
Import
# Importing data:
d_study3_0 <- haven::read_spss(
here::here("data-raw", "00_raw", "3_julius.sav")
)
Exclusion
####### Informed Consent #######
# First excluding "Informed Consent is NA"
# Then based on "Informed Consent is 0"
d_study3_1 <- d_study3_0 %>%
dplyr::filter(!is.na(Consent)) %>%
dplyr::filter(Consent == 1)
## Number of participants excluded based on Informed Consent
nrow(d_study3_0) - nrow(d_study3_1)
## [1] 0
####### Progress #######
d_study3_1_2 <- d_study3_1 %>%
dplyr::filter(Progress > 95)
## Number of participants excluded based on Progress
nrow(d_study3_1) - nrow(d_study3_1_2)
## [1] 2
####### Duration #######
d_study3_2 <- d_study3_1_2 %>% RadLab::duration_exclusion("Duration__in_seconds_")
## Median duration: 1605 seconds
## Upper bound: 3210 seconds
## Lower bound: 802.5 seconds
## Number of participants excluded: 15
Cleaning
# Assigning ID to the participants:
d_study3_2$ID <-
# Number of Participants in Previous Studies:
length(unique(d_study1$ID)) +
length(unique(d_study2$ID)) +
# Number of participants in this study:
1:nrow(d_study3_2)
####### Cleaning & Columns #######
# Only Selecting the Relevant Columns
d_study3_cleaning_1 <- d_study3_2 %>%
subset(select = grep("ID|P1_Q|P0_Q", names(.))) %>%
subset(select = -grep("e_", names(.)))
# Combining Two Conditions
for (i in 1:15){
for (j in 1:4){
d_study3_cleaning_1 <- d_study3_cleaning_1 %>% RadLab::add_row_sums(
var_name = paste("Q", i, "_", j,
sep = ""),
# Items to sum up
item_names = c(
# Option 4
grep(
paste("Q",i,"_",j,
sep = ""),
names(d_study3_cleaning_1),
value = TRUE)),
ignore_na = T)
}
}
# Getting Rid of these previous Columns
d_study3_cleaning_2 <- d_study3_cleaning_1 %>% subset(
select = -grep("P1|P0", names(.)))
# Demographics
d_study3_demographics <- d_study3_2 %>%
subset(select = c(
ID,
Condition, # Condition
Gen_2, # Gender
age_2, # Age
Pol1_2, # pol_liberal_conserv
Pol2_2 # pol_left_right
))
# Combining
d_study3_3 <- dplyr::left_join(
d_study3_cleaning_2,
d_study3_demographics,
by = "ID")
# Relocating ID
d_study3_4 <- d_study3_3 %>% dplyr::relocate("ID", .after = "Q15_4")
# Assigning experiment number to all participants in this dataset
d_study3_4$experiment <- 3
# Converting the Columns to Numeric
d_study3 <- RadLab::enforce_variable_types(d_study3_4)
Means plot
d_study3[, 1:60] %>% RadLab::plot_vignette_means(
name_data = o_study3,
n_vignettes = 15)
## [1] "all rows sum up to a 100, so everything is good!"
Data preparation
Structuring the data
d_study3_long_0 <- d_study3 %>% wide_to_long(
name_data = o_study3,
n_vignettes = 15)
Means Plot (2)
RadLab::plot_vignette_analysis(
d_study3_long_0,
error_type = "sd"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## $ribbon
##
## $faceted
##
## $bars
Factorizing
##### Factorizing #####
# Condition
d_study3_long_0 <- d_study3_long_0 %>%
dplyr::mutate(
condition_f = factor(
haven::labelled_spss(Condition),
levels = 0:1,
labels = paste("Condition", 1:2)))
# Gender
d_study3_long_0 <- d_study3_long_0 %>%
dplyr::mutate(
gender_f = factor(
haven::labelled_spss(Gen_2),
levels = 1:2,
labels = c("Female", "Male"))) # Checked
# Age
d_study3_long_0$age <- d_study3_long_0$age_2
# Taking out the already converted variables
d_study3_long <- d_study3_long_0 %>% subset(select = -c(Condition, Gen_2, age_2))
Save
d_study3 <- d_study3_long %>% subset(select = -c(
Pol1_2,
Pol2_2))
save(d_study3, file = here::here("data-raw", "01_cleaned", "d_study3.rda"))
————-
Study 4 (2020-2021)
Import
# Importing data:
d_study4_0 <- haven::read_spss(
here::here("data-raw", "00_raw", "4_barbara.sav")
)
Exclusion
####### Informed Consent #######
# First excluding "Informed Consent is NA"
# Then based on "Informed Consent is 0"
d_study4_1 <- d_study4_0 %>%
dplyr::filter(!is.na(ConsentForm)) %>%
dplyr::filter(ConsentForm == "Yes, I agree to participate in this study.")
## Number of participants excluded based on Informed Consent
nrow(d_study4_0) - nrow(d_study4_1)
## [1] 0
####### Progress #######
## No Progress in the dataset
####### Duration #######
## No duration in the dataset
# Just for the sake of keeping the code same as the other studies:
d_study4_2 <- d_study4_1
Cleaning
Cleaning the columns in this study is a little bit tougher than the
other studies. Two things:
- Combining the conditions in the 8th Vignette
- Summing Masculine & Feminine ENNA (i.e., action options 4 &
5), since we are only interested in ENNA in general and not the seperate
versions.
########## Assigning ID ##########
# Assigning ID to the participants:
d_study4_2$ID <-
# Number of Participants in Previous Studies:
length(unique(d_study1$ID)) +
length(unique(d_study2$ID)) +
length(unique(d_study3$ID)) +
# Number of participants in this study:
1:nrow(d_study4_2)
########## Q8 ##########
# Separating Q8
d_study4_Q8 <- d_study4_2 %>%
subset(
select = c(
grep("flyer|Flyer", names(.)),
ID))
# Q8 Combining the 6 Conditions into 5 new variables
for(i in 1:5){
d_study4_Q8 <- d_study4_Q8 %>% RadLab::add_row_sums(
var_name = paste("Q8", "flyer", i, sep = "_"),
item_names = colnames(d_study4_Q8)[
grep(i, names(d_study4_Q8))],
ignore_na = T)
}
########## Action Options 4 & 5 ##########
# Selecting Only the Relevant Columns:
d_study4_cleaning_0 <- d_study4_2 %>%
subset(
select = c(
grep(paste("Q", 1:16, sep = "", collapse = "|"),names(.)),
ID)) %>%
# Taking out these "_mean_" variables:
subset(select = -grep("_mean_", names(.))) %>%
# Not having the Q8:
subset(select = -grep("flyer|Flyer", names(.)))
# Adding the 5 sum variables of Q8
d_study4_cleaning_1 <- dplyr::left_join(
d_study4_cleaning_0,
d_study4_Q8[, c("Q8_flyer_1",
"Q8_flyer_2",
"Q8_flyer_3",
"Q8_flyer_4",
"Q8_flyer_5",
"ID")],
by = "ID")
# Combining Action Options 4 & 5 for all vignettes
for (i in 1:16){
# Extracting the vignette name
questions <- grep(
paste("Q", i, "_", sep = ""),
names(d_study4_cleaning_1),
value = TRUE)
vignette_name <- strsplit(questions[1],"_")[[1]][2]
# Creating the Variable
d_study4_cleaning_1 <- d_study4_cleaning_1 %>% RadLab::add_row_sums(
var_name = paste("Q", i, "_",
vignette_name,
"_ENNA", sep = ""),
# Items to sum up
item_names = c(
# Option 4
grep(
paste("Q",i,"_",".*","_4", sep = ""),
names(d_study4_cleaning_1),
value = TRUE),
# Option 5
grep(
paste("Q",i,"_",".*","_5", sep = ""),
names(d_study4_cleaning_1),
value = TRUE)),
ignore_na = T)
}
# Taking out the action options 4 & 5
d_study4_cleaning_2 <- d_study4_cleaning_1 %>%
subset(select = -grep("_4|_5", names(.)))
# Changing the ENNA column names to "4"
colnames(d_study4_cleaning_2) <- gsub(
"_ENNA",
"_4",
colnames(d_study4_cleaning_2))
########## Ordering ##########
# Saving only the column names to a new df
cn <- as.data.frame(colnames(d_study4_cleaning_2))
colnames(cn)[1] = "questions"
# creating order based on question number
cn <- cn %>%
dplyr::rowwise() %>%
dplyr::mutate(
vignette_number = as.numeric(
gsub(".*?([0-9]+).*", "\\1", questions)))
## Warning: There was 1 warning in `dplyr::mutate()`.
## ℹ In argument: `vignette_number = as.numeric(gsub(".*?([0-9]+).*", "\\1",
## questions))`.
## ℹ In row 46.
## Caused by warning:
## ! NAs introduced by coercion
# Creating a nested order:
## 1) vignette number (1-16),
## then the action option number (1-4)
cn_ordered <- cn[order(cn$vignette_number, cn$questions),]
# Creating a new ordered df
d_study4_cleaning_3 <- d_study4_cleaning_2[cn_ordered$questions]
########## Demographics ##########
d_study4_demographics <- d_study4_2 %>%
subset(select = c(
ID,
Cond, # Condition (6)
GenderR, # Gender (Binary, already converted):
## Male == 0, Female == 1
Age, # Age
# **Missing** # Political Liberal - Conservative
PolOri # Political Left (1) - Right (10)
))
# Combining Demographics with Vignettes
d_study4_3 <- dplyr::left_join(
d_study4_cleaning_3,
d_study4_demographics,
by = "ID")
# Assigning experiment number to all participants in this dataset
d_study4_3$experiment <- 4
# Converting the Columns to Numeric
d_study4 <- RadLab::enforce_variable_types(d_study4_3)
Means Plot
d_study4[,1:64] %>% RadLab::plot_vignette_means(
name_data = o_study4,
n_vignettes = 16)
## [1] "all rows sum up to a 100, so everything is good!"
Data preparation
Structuring the data
d_study4_long_0 <- d_study4 %>% wide_to_long(
name_data = o_study4,
n_vignettes = 16)
Means Plot (2)
RadLab::plot_vignette_analysis(
d_study4_long_0,
error_type = "sd"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## $ribbon
##
## $faceted
##
## $bars
Factorizing
##### Factorizing #####
# Condition
d_study4_long_0 <- d_study4_long_0 %>%
dplyr::mutate(
condition_f = factor(
haven::labelled_spss(Cond),
levels = 1:6,
labels = paste("Condition", 1:6)))
# Gender
d_study4_long_0 <- d_study4_long_0 %>%
dplyr::mutate(
gender_f = factor(
haven::labelled_spss(GenderR),
levels = 0:1,
labels = c("Male", "Female"))) # Checked
# Age
d_study4_long_0$age <- d_study4_long_0$Age
# Taking out the already converted variables
d_study4_long <- d_study4_long_0 %>% subset(select = -c(Cond, GenderR, Age))
Save
d_study4 <- d_study4_long %>% subset(select = -c(
PolOri))
save(d_study4, file = here::here("data-raw", "01_cleaned", "d_study4.rda"))
————-
Study 5 (2021-2022)
Import
d_study5_0 <- haven::read_spss(
here::here("data-raw", "00_raw", "5_brouwer_et_al.sav")
)
Exclusion
####### Informed Consent #######
# First excluding "Informed Consent is NA"
# Then based on "Informed Consent is 0"
d_study5_1 <- d_study5_0 %>%
dplyr::filter(!is.na(Q1)) %>%
dplyr::filter(Q1 == 1)
## Number of participants excluded based on Informed Consent
nrow(d_study5_0) - nrow(d_study5_1)
## [1] 31
####### Progress #######
d_study5_1_2 <- d_study5_1 %>%
dplyr::filter(Progress > 95)
## Number of participants excluded based on Progress
nrow(d_study5_1) - nrow(d_study5_1_2)
## [1] 86
####### Duration #######
d_study5_1_3 <- d_study5_1_2 %>% RadLab::duration_exclusion("Duration__in_seconds_")
## Median duration: 1504.5 seconds
## Upper bound: 3009 seconds
## Lower bound: 752.25 seconds
## Number of participants excluded: 71
####### Incorrect data #######
# This response is probably made during testing
# Because we can see (later on) the action options do not sum up to a 100 for this specific participant.
d_study5_2 <- d_study5_1_3 %>% filter(ResponseId != "R_pyfF9CBCg3Zc35v")
## Check that we excluded that participant:
nrow(d_study5_1_3) - nrow(d_study5_2)
## [1] 1
Cleaning
Note: The vignettes 4-10 are three different conditions mixed together.
########## Assigning ID ##########
# Assigning ID to the participants:
d_study5_2$ID <-
# Number of Participants in Previous Studies:
length(unique(d_study1$ID)) +
length(unique(d_study2$ID)) +
length(unique(d_study3$ID)) +
length(unique(d_study4$ID)) +
# Number of participants in this study:
1:nrow(d_study5_2)
####### Columns #######
d_study5_3 <- d_study5_2
d_study5_3 <- d_study5_3 %>%
dplyr::mutate(
# Vignette 4
ina_4 = rowSums(
dplyr::select(., Vig_4___Q_1, Vig_4___Q_1.0, Vig_4___Q_1.1),
na.rm = TRUE
),
na_4 = rowSums(
dplyr::select(., Vig_4___Q_2, Vig_4___Q_2.0, Vig_4___Q_2.1),
na.rm = TRUE
),
nna_4 = rowSums(
dplyr::select(., Vig_4___Q_3, Vig_4___Q_3.0, Vig_4___Q_3.1),
na.rm = TRUE
),
enna_4 = rowSums(
dplyr::select(., Vig_4___Q_4, Vig_4___Q_4.0, Vig_4___Q_4.1),
na.rm = TRUE
),
# Vignette 5
ina_5 = rowSums(
dplyr::select(., Vig_5___Q_1, Vig_5___Q_1.0, Vig_5___Q_1.1),
na.rm = TRUE
),
na_5 = rowSums(
dplyr::select(., Vig_5___Q_2, Vig_5___Q_2.0, Vig_5___Q_2.1),
na.rm = TRUE
),
nna_5 = rowSums(
dplyr::select(., Vig_5___Q_3, Vig_5___Q_3.0, Vig_5___Q_3.1),
na.rm = TRUE
),
enna_5 = rowSums(
dplyr::select(., Vig_5___Q_4, Vig_5___Q_4.0, Vig_5___Q_4.1),
na.rm = TRUE
),
# Vignette 6
ina_6 = rowSums(
dplyr::select(., Vig_6___Q_1, Vig_6___Q_1.0, Vig_6___Q_1.1),
na.rm = TRUE
),
na_6 = rowSums(
dplyr::select(., Vig_6___Q_2, Vig_6___Q_2.0, Vig_6___Q_2.1),
na.rm = TRUE
),
nna_6 = rowSums(
dplyr::select(., Vig_6___Q_3, Vig_6___Q_3.0, Vig_6___Q_3.1),
na.rm = TRUE
),
enna_6 = rowSums(
dplyr::select(., Vig_6___Q_4, Vig_6___Q_4.0, Vig_6___Q_4.1),
na.rm = TRUE
),
# Vignette 7
ina_7 = rowSums(
dplyr::select(., Vig_7___Q_1, Vig_7___Q_1.0, Vig_7___Q_1.1),
na.rm = TRUE
),
na_7 = rowSums(
dplyr::select(., Vig_7___Q_2, Vig_7___Q_2.0, Vig_7___Q_2.1),
na.rm = TRUE
),
nna_7 = rowSums(
dplyr::select(., Vig_7___Q_3, Vig_7___Q_3.0, Vig_7___Q_3.1),
na.rm = TRUE
),
enna_7 = rowSums(
dplyr::select(., Vig_7___Q_4, Vig_7___Q_4.0, Vig_7___Q_4.1),
na.rm = TRUE
),
# Vignette 8
ina_8 = rowSums(
dplyr::select(., Vig_8___Q_1, Vig_8___Q_1.0, Vig_8___Q_1.1),
na.rm = TRUE
),
na_8 = rowSums(
dplyr::select(., Vig_8___Q_2, Vig_8___Q_2.0, Vig_8___Q_2.1),
na.rm = TRUE
),
nna_8 = rowSums(
dplyr::select(., Vig_8___Q_3, Vig_8___Q_3.0, Vig_8___Q_3.1),
na.rm = TRUE
),
enna_8 = rowSums(
dplyr::select(., Vig_8___Q_4, Vig_8___Q_4.0, Vig_8___Q_4.1),
na.rm = TRUE
),
# Vignette 9
ina_9 = rowSums(
dplyr::select(., Vig_9___Q_1, Vig_9___Q_1.0, Vig_9___Q_1.1),
na.rm = TRUE
),
na_9 = rowSums(
dplyr::select(., Vig_9___Q_2, Vig_9___Q_2.0, Vig_9___Q_2.1),
na.rm = TRUE
),
nna_9 = rowSums(
dplyr::select(., Vig_9___Q_3, Vig_9___Q_3.0, Vig_9___Q_3.1),
na.rm = TRUE
),
enna_9 = rowSums(
dplyr::select(., Vig_9___Q_4, Vig_9___Q_4.0, Vig_9___Q_4.1),
na.rm = TRUE
),
# Vignette 10
ina_10 = rowSums(
dplyr::select(., Vig_10___Q_1, Vig_10___Q_1.0, Vig_10___Q_1.1),
na.rm = TRUE
),
na_10 = rowSums(
dplyr::select(., Vig_10___Q_2, Vig_10___Q_2.0, Vig_10___Q_2.1),
na.rm = TRUE
),
nna_10 = rowSums(
dplyr::select(., Vig_10___Q_3, Vig_10___Q_3.0, Vig_10___Q_3.1),
na.rm = TRUE
),
enna_10 = rowSums(
dplyr::select(., Vig_10___Q_4, Vig_10___Q_4.0, Vig_10___Q_4.1),
na.rm = TRUE
)
)
# Rename columns explicitly for Vignettes 1, 2, and 3
d_study5_3 <- d_study5_3 %>%
rename(
# Vignette 1
ina_1 = Vig_1___Q_1,
na_1 = Vig_1___Q_2,
nna_1 = Vig_1___Q_3,
enna_1 = Vig_1___Q_4,
# Vignette 2
ina_2 = Vig_2___Q_1,
na_2 = Vig_2___Q_2,
nna_2 = Vig_2___Q_3,
enna_2 = Vig_2___Q_4,
# Vignette 3
ina_3 = Vig_3___Q_1,
na_3 = Vig_3___Q_2,
nna_3 = Vig_3___Q_3,
enna_3 = Vig_3___Q_4
)
d_study5_4 <- d_study5_3 %>%
dplyr::select(
ina_1, na_1, nna_1, enna_1,
ina_2, na_2, nna_2, enna_2,
ina_3, na_3, nna_3, enna_3,
ina_4, na_4, nna_4, enna_4,
ina_5, na_5, nna_5, enna_5,
ina_6, na_6, nna_6, enna_6,
ina_7, na_7, nna_7, enna_7,
ina_8, na_8, nna_8, enna_8,
ina_9, na_9, nna_9, enna_9,
ina_10, na_10, nna_10, enna_10,
ID, Gender, Age, Conditie
)
# Check the final column names to confirm the order
colnames(d_study5_4)
## [1] "ina_1" "na_1" "nna_1" "enna_1" "ina_2" "na_2"
## [7] "nna_2" "enna_2" "ina_3" "na_3" "nna_3" "enna_3"
## [13] "ina_4" "na_4" "nna_4" "enna_4" "ina_5" "na_5"
## [19] "nna_5" "enna_5" "ina_6" "na_6" "nna_6" "enna_6"
## [25] "ina_7" "na_7" "nna_7" "enna_7" "ina_8" "na_8"
## [31] "nna_8" "enna_8" "ina_9" "na_9" "nna_9" "enna_9"
## [37] "ina_10" "na_10" "nna_10" "enna_10" "ID" "Gender"
## [43] "Age" "Conditie"
# Assigning experiment number to all participants in this dataset
d_study5_4$experiment <- 5
# Converting the Columns to Numeric
d_study5 <- RadLab::enforce_variable_types(d_study5_4)
Means Plot
d_study5[,1:40] %>% RadLab::plot_vignette_means(
name_data = o_study5,
n_vignettes = 10)
## [1] "all rows sum up to a 100, so everything is good!"
Data preparation
Structuring the data
d_study5_long_0 <- d_study5 %>% wide_to_long(
name_data = o_study5,
n_vignettes = 10)
Means Plot (2)
RadLab::plot_vignette_analysis(
d_study5_long_0,
error_type = "sd"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## $ribbon
##
## $faceted
##
## $bars
Factorizing
##### Factorizing #####
# Condition
d_study5_long_0 <- d_study5_long_0 %>%
dplyr::mutate(
condition_f = factor(
haven::labelled_spss(Conditie),
levels = 0:2,
labels = paste("Condition", 1:3)))
# Gender
d_study5_long_0 <- d_study5_long_0 %>%
dplyr::mutate(
gender_f = factor(
haven::labelled_spss(Gender),
levels = 1:2,
labels = c("Male", "Female"))) # Checked
# Age
d_study5_long_0$age <- d_study5_long_0$Age
# Taking out the already converted variables
d_study5_long <- d_study5_long_0 %>% subset(select = -c(Conditie, Gender, Age))
Save
d_study5 <- d_study5_long
save(d_study5, file = here::here("data-raw", "01_cleaned", "d_study5.rda"))
————-
Study 6 (2022-2023)
Import
# Importing data:
d_study6_0 <- haven::read_spss(
here::here("data-raw", "00_raw", "6_cham_et_al.sav")
)
Exclusion
####### Informed Consent #######
# First excluding "Informed Consent is NA"
# Then based on "Informed Consent is 0"
d_study6_1 <- d_study6_0 %>%
dplyr::filter(!is.na(Consentyesno)) %>%
dplyr::filter(Consentyesno == 1)
## Number of participants excluded based on Informed Consent
nrow(d_study6_0) - nrow(d_study6_1)
## [1] 0
####### Progress #######
d_study6_1_2 <- d_study6_1 %>%
dplyr::filter(Progress > 95)
## Number of participants excluded based on Progress
nrow(d_study6_1) - nrow(d_study6_1_2)
## [1] 0
####### Duration #######
d_study6_2 <- d_study6_1_2 %>% RadLab::duration_exclusion("Duration__in_seconds_")
## Median duration: 1062.5 seconds
## Upper bound: 2125 seconds
## Lower bound: 531.25 seconds
## Number of participants excluded: 39
Cleaning
########## Assigning ID ##########
# Assigning ID to the participants:
d_study6_2$ID <-
# Number of Participants in Previous Studies:
length(unique(d_study1$ID)) +
length(unique(d_study2$ID)) +
length(unique(d_study3$ID)) +
length(unique(d_study4$ID)) +
length(unique(d_study5$ID)) +
# Number of participants in this study:
1:nrow(d_study6_2)
####### Columns #######
# Only Selecting the Relevant Columns
d_study6_3 <- d_study6_2 %>%
subset(
select = c(grep("Vig1|Vig2|Vig3|Vig4|Vig5|Vig6|Vig7|Vig8",
names(.)),
ID))
########## Demographics ##########
d_study6_demographics <- d_study6_2 %>%
subset(select = c(
ID,
# Condition missing?, # Condition
Gender, # Gender (Not Binary):
Age # Age - 1: Male, 2: Female
# **Missing** # Political Liberal - Conservative
# **Missing** # Political Left - Right
))
# Combining Demographics with Vignettes
d_study6_4 <- dplyr::left_join(
d_study6_3,
d_study6_demographics,
by = "ID")
# Assigning experiment number to all participants in this dataset
d_study6_4$experiment <- 6
# Converting the Columns to Numeric
d_study6 <- RadLab::enforce_variable_types(d_study6_4)
Means Plot
d_study6[,1:32] %>% RadLab::plot_vignette_means(
name_data = o_study6,
n_vignettes = 8)
## [1] "all rows sum up to a 100, so everything is good!"
Data preparation
Structuring the data
d_study6_long_0 <- d_study6 %>% wide_to_long(
name_data = o_study6,
n_vignettes = 8)
Means Plot (2)
RadLab::plot_vignette_analysis(
d_study6_long_0,
error_type = "sd"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## $ribbon
##
## $faceted
##
## $bars
Factorizing
##### Factorizing #####
# Condition
#* No condition in this experiment.
d_study6_long_0$condition_f <- as.factor(0)
# Gender
d_study6_long_0 <- d_study6_long_0 %>%
dplyr::mutate(
gender_f = factor(
haven::labelled_spss(Gender),
levels = 1:2,
labels = c("Male", "Female"))) # Checked
# Age
d_study6_long_0$age <- d_study6_long_0$Age
# Taking out the already converted variables
d_study6_long <- d_study6_long_0 %>% subset(select = -c(Gender, Age))
Save
d_study6 <- d_study6_long
save(d_study6, file = here::here("data-raw", "01_cleaned", "d_study6.rda"))
————-
Study 7 (2022-2023)
Import
# Importing data:
d_study7_0 <- haven::read_spss(
here::here("data-raw", "00_raw", "7_jordan.sav")
)
Exclusion
I think this is already done; it is already the cleaned data I think.
d_study7_2 <- d_study7_0
Cleaning
########## Assigning ID ##########
# Assigning ID to the participants:
d_study7_2$ID <-
# Number of Participants in Previous Studies:
length(unique(d_study1$ID)) +
length(unique(d_study2$ID)) +
length(unique(d_study3$ID)) +
length(unique(d_study4$ID)) +
length(unique(d_study5$ID)) +
length(unique(d_study6$ID)) +
# Number of participants in this study:
1:nrow(d_study7_2)
####### Columns #######
# Load necessary libraries
library(dplyr)
library(stringr)
# Rename function for Study 7 with sequential suffixes for each question type
rename_study7_columns <- function(data) {
# Identify the columns that start with "Q" and end in "_1", "_2", "_3", "_4"
question_cols <- grep("^Q[0-9]+_", names(data), value = TRUE)
# Extract the vignette number for renaming
vignette_numbers <- sapply(str_extract_all(question_cols, "[0-9]+"), function(x) x[1])
# Rename columns by matching the suffix and adding appropriate prefix (ina, na, nna, enna)
new_names <- mapply(function(col, vignette) {
if (grepl("_1$", col)) {
paste("ina", vignette, sep = "_")
} else if (grepl("_2$", col)) {
paste("na", vignette, sep = "_")
} else if (grepl("_3$", col)) {
paste("nna", vignette, sep = "_")
} else if (grepl("_4$", col)) {
paste("enna", vignette, sep = "_")
} else {
col # Leave other columns unchanged
}
}, question_cols, vignette_numbers)
# Rename columns in data
names(data)[names(data) %in% question_cols] <- new_names
return(data)
}
# Apply renaming function on Study 7 data
d_study7_3 <- rename_study7_columns(d_study7_2)
# Reorder columns manually to preserve the order of "ina", "na", "nna", "enna"
d_study7_4 <- d_study7_3 %>%
dplyr::select(
ina_1, na_1, nna_1, enna_1,
ina_2, na_2, nna_2, enna_2,
ina_3, na_3, nna_3, enna_3,
ina_4, na_4, nna_4, enna_4,
ina_5, na_5, nna_5, enna_5,
ina_6, na_6, nna_6, enna_6,
ina_7, na_7, nna_7, enna_7,
ID, Gender, Age
)
# Check the resulting dataset to ensure only relevant columns are selected
colnames(d_study7_4)
## [1] "ina_1" "na_1" "nna_1" "enna_1" "ina_2" "na_2" "nna_2" "enna_2"
## [9] "ina_3" "na_3" "nna_3" "enna_3" "ina_4" "na_4" "nna_4" "enna_4"
## [17] "ina_5" "na_5" "nna_5" "enna_5" "ina_6" "na_6" "nna_6" "enna_6"
## [25] "ina_7" "na_7" "nna_7" "enna_7" "ID" "Gender" "Age"
# Assigning experiment number to all participants in this dataset
d_study7_4$experiment <- 7
# Converting the Columns to Numeric
d_study7 <- RadLab::enforce_variable_types(d_study7_4)
Means Plot
d_study7[,1:28] %>% RadLab::plot_vignette_means(
name_data = o_study7,
n_vignettes = 7)
## [1] "all rows sum up to a 100, so everything is good!"
Data preparation
Structuring the data
d_study7_long_0 <- d_study7 %>% wide_to_long(
name_data = o_study7,
n_vignettes = 7)
Means Plot (2)
RadLab::plot_vignette_analysis(
d_study7_long_0,
error_type = "sd"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## $ribbon
##
## $faceted
##
## $bars
Factorizing
##### Factorizing #####
# Condition
d_study7_long_0$condition_f <- as.factor(0)
# Gender
d_study7_long_0 <- d_study7_long_0 %>%
dplyr::mutate(
gender_f = factor(
haven::labelled_spss(Gender),
levels = 1:2,
labels = c("Male", "Female"))) # Checked
# Age
d_study7_long_0$age <- d_study7_long_0$Age
# Taking out the already converted variables
d_study7_long <- d_study7_long_0 %>% subset(select = -c(Gender, Age))
Save
d_study7 <- d_study7_long
save(d_study7, file = here::here("data-raw", "01_cleaned", "d_study7.rda"))
————-
Study 8 (2023-2024)
Import
# Importing data:
d_study8_0 <- haven::read_spss(
here::here("data-raw", "00_raw", "8_deree.sav")
)
Exclusion
I think this is already done; it is already the cleaned data I think.
d_study8_2 <- d_study8_0
Cleaning
########## Assigning ID ##########
# Assigning ID to the participants:
d_study8_2$ID <-
# Number of Participants in Previous Studies:
length(unique(d_study1$ID)) +
length(unique(d_study2$ID)) +
length(unique(d_study3$ID)) +
length(unique(d_study4$ID)) +
length(unique(d_study5$ID)) +
length(unique(d_study6$ID)) +
length(unique(d_study7$ID)) +
# Number of participants in this study:
1:nrow(d_study8_2)
####### Columns #######
# Load necessary libraries
library(dplyr)
library(stringr)
# Function to rename columns based on the pattern
rename_study8_columns <- function(data) {
# Identify columns that start with "V" and end in "_1", "_2", "_3", "_4"
vignette_cols <- grep("^V[0-9]+_", names(data), value = TRUE)
# Extract the vignette number for renaming
vignette_numbers <- sapply(str_extract_all(vignette_cols, "[0-9]+"), function(x) x[1])
# Rename columns by matching the suffix and adding appropriate prefix (ina, na, nna, enna)
new_names <- mapply(function(col, vignette) {
if (grepl("_1$", col)) {
paste("ina", vignette, sep = "_")
} else if (grepl("_2$", col)) {
paste("na", vignette, sep = "_")
} else if (grepl("_3$", col)) {
paste("nna", vignette, sep = "_")
} else if (grepl("_4$", col)) {
paste("enna", vignette, sep = "_")
} else {
col # Leave other columns unchanged
}
}, vignette_cols, vignette_numbers)
# Rename columns in data
names(data)[names(data) %in% vignette_cols] <- new_names
return(data)
}
# Apply renaming function on Study 8 data
d_study8_3 <- rename_study8_columns(d_study8_2)
# Select relevant columns in the specified order
d_study8_4 <- d_study8_3 %>%
dplyr::select(
ina_1, na_1, nna_1, enna_1,
ina_2, na_2, nna_2, enna_2,
ina_3, na_3, nna_3, enna_3,
ina_4, na_4, nna_4, enna_4,
ina_5, na_5, nna_5, enna_5,
ina_6, na_6, nna_6, enna_6,
ID, gen, age
)
# Assign IDs, Experiment Number, and select columns
d_study8_4$experiment <- 8
# Check the column names to confirm correct renaming
colnames(d_study8_4)
## [1] "ina_1" "na_1" "nna_1" "enna_1" "ina_2"
## [6] "na_2" "nna_2" "enna_2" "ina_3" "na_3"
## [11] "nna_3" "enna_3" "ina_4" "na_4" "nna_4"
## [16] "enna_4" "ina_5" "na_5" "nna_5" "enna_5"
## [21] "ina_6" "na_6" "nna_6" "enna_6" "ID"
## [26] "gen" "age" "experiment"
# Convert columns to numeric if necessary
d_study8 <- RadLab::enforce_variable_types(d_study8_4)
Means Plot
d_study8[,1:24] %>% RadLab::plot_vignette_means(
name_data = o_study8,
n_vignettes = 6)
## [1] "all rows sum up to a 100, so everything is good!"
Data preparation
Structuring the data
d_study8_long_0 <- d_study8 %>% wide_to_long(
name_data = o_study8,
n_vignettes = 6)
Means Plot (2)
RadLab::plot_vignette_analysis(
d_study8_long_0,
error_type = "sd"
)
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## $ribbon
##
## $faceted
##
## $bars
Factorizing
##### Factorizing #####
# Condition
d_study8_long_0$condition_f <- as.factor(0)
# Gender
d_study8_long_0 <- d_study8_long_0 %>%
dplyr::mutate(
gender_f = factor(
haven::labelled_spss(gen),
levels = 1:2,
labels = c("Male", "Female"))) # Checked
# Age
## already fine
# Taking out the already converted variables
d_study8_long <- d_study8_long_0 %>% subset(select = -c(gen))
Save
d_study8 <- d_study8_long
save(d_study8, file = here::here("data-raw", "01_cleaned", "d_study8.rda"))